In [23]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.cross_validation import train_test_split
pd.options.mode.chained_assignment = None
from sklearn.externals import joblib

In [34]:
# Read the Titanic_train.csv file here

For this example, we are only extracting 2 things: Class and sex.

Do that below


In [26]:
# Extract the pclass and sex into a new Dataframe


Out[26]:
pclass sex
0 3rd female
1 1st male
2 2nd male
3 1st male
4 3rd male

In [27]:
# Convert pclass to pure numbers


Out[27]:
pclass sex
0 3 female
1 1 male
2 2 male
3 1 male
4 3 male

In [ ]:
# Replace the sex with 0 for female, 1 for male

In [29]:
# Create the expected result dataframe.

In [30]:
# Create test/train split

In [31]:
# Create the random forest instance, and train it with training data


C:\st\Anaconda3\lib\site-packages\ipykernel\__main__.py:2: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().
  from ipykernel import kernelapp as app
Out[31]:
RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=None, max_features='auto', max_leaf_nodes=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, n_estimators=100, n_jobs=1,
            oob_score=False, random_state=None, verbose=0,
            warm_start=False)

In [32]:
# Get the accuracy of your model


Out[32]:
0.80263157894736847

In [33]:
# Write the model to a file called "titanic_model2"


Out[33]:
['titanic_model2']

In [ ]: